/*********** NOTES HEADER *********** 
Step 1: Idenfity unique patient-doctor relationship
Import claims data:
 Claims 	- Dr office visits: bcarrier_line_j.dta  
  		- Hospital Outpatient: outpatient_base_claims.dta
			Note that Hospital outpatient lists three physician types and we try to keep all 3 around 
	
Step 2: Merge SKA firm practice of doctor onto Dr-Patient file, 
Dataset is only large practices involved in vertical mergers (i.e., hospital buys practice)
In 2012 there are approximately 17% of physicians that merge on
 Import Dr. Information from SKA
   Files used 	- cleaned_ska_npi and cleaned_ska_upin
	Files created by Nathan - original data: "N:\MedicareClaims-P045601-BE\Work\ska\ska_verifiedList_ad.dta"
		File that creates the data is called: "N:\MedicareClaims-P045601-BE\Work\hosp_retro\Nathan\Code\C. Physician Cleaning"

Datasets used:
	(1) `yr'\bcarrier_line_j.dta
	(2) `yr'\outpatient_base_claims_j
	(3) cleaned_ska_`type'.dta
	(4) `yr'\mbsf_ab_summary.dta

Datasets created:		
	(1) bene_prov_`yr'.dta		
	(2) car_uniqprov_`yr'.dta
	(3) OP_uniqprov_`yr' 
	(4) carOP_uniqprov_`yr'
	(5) drDems`type'_`yr'
	(6) alldrInfo_`yr'
	(7) unique_alldr-bene__`yr'
	(8) taxid_`yr'
	(9) uniqtaxid05-12
	(10) unique_alldr-bene__`yr'
		
************************************/ 

set more off
capture log close
clear all
global path_mbsf "N:\MedicareClaims-P045601-BE\Work\"
global origData "N:\MedicareClaims-P045601-BE"
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In\"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"

adopath +  N:\SIL-Common\estout
adopath +  N:\SIL-Common\outreg2
adopath +  N:\SIL-Common\estout
adopath +  N:\SIL-Common\outreg2

***READIN CARRIER FILES***
foreach yr of numlist 2005(1)2012 {
  use *TAX* BENE_ID CLM_ID CLM_THRU_DT LINE_NUM PRF_PHYSN_UPIN PRF_PHYSN_NPI PRVDR* ORG_NPI_NUM using "$origData\Work/`yr'\bcarrier_line_j.dta"

save "$dataOut\bene_prov_`yr'.dta", replace

 } 

***CREATE UNIQUE BENE-PROVIDER-MONTH FILES FROM CARRIER AND OP FILES (OMIT INPATIENT)***

foreach yr of numlist 2005(1)2012 {

*CARRIER FILES
*Aggregate across claims to unique provider
	use "$dataOut\bene_prov_`yr'.dta", replace
	  gen date=mdy(month(CLM_THRU_DT),1,year( CLM_THRU_DT))
	  format date %tdD_m_Y
	  drop CLM_ID LINE_NUM CLM_THRU_DT 
	  duplicates drop
	save "$dataOut\car_uniqprov_`yr'.dta", replace


*OUTPATIENT FILES
*Aggregate across claims to unique provider
	use BENE_ID CLM_THRU_DT PRVDR_NUM *NPI* *UPI* using "$origData\Work/`yr'\outpatient_base_claims_j"
	 gen date=mdy(month(CLM_THRU_DT),1,year( CLM_THRU_DT))
	 format date %tdD_m_Y
	 drop CLM_THRU_DT 
	 duplicates drop
	save "$dataOut\OP_uniqprov_`yr'.dta", replace
clear

*Outpatient files list three doctors: Attending (AT), operating (OP), and other (OT). 
	use "$dataOut\OP_uniqprov_`yr'.dta"
*This bit of code just renames the variables. Probably unnecessary 
	foreach var of varlist OP* AT* OT* {
	 local prefix=substr("`var'",1,2)
	 local var1=regexr("`var'","`prefix'","")
	 local var2=regexr("`var1'","_PHYSN_","")
	 rename `var' `var2'_`prefix'
	}
	
*APPEND OUTPATIENT FILES TO CARRIER FILES
	append using "$dataOut\car_uniqprov_`yr'.dta"
	save  "$dataOut\carOP_uniqprov_`yr'.dta"

*Trouble reshaping file. Below is a sort-of hardcode of the reshape command.
	use  "$dataOut\carOP_uniqprov_`yr'.dta"
*NPI_AT from outpatient file; PRF_PHYSN_NPI from carrier file
	 gen npi=NPI_AT  
	  replace npi=PRF_PHYSN_NPI if PRF_PHYSN_NPI~=""
*Going to keep OP and OT as well. Don't need it if already identified using AT
	  replace NPI_OP="" if npi==NPI_OP
	  replace NPI_OT="" if npi==NPI_OT
	 gen upin=UPIN_AT  
	  replace upin=PRF_PHYSN_UPIN if PRF_PHYSN_UPIN ~=""
	  replace UPIN_OP="" if upin==UPIN_OP
	  replace UPIN_OT="" if upin==UPIN_OT

*Generating OP and OT files to be stacked later
	foreach k in "OP" "OT" {
		preserve
	keep BENE_ID date ORG_NPI_NUM UPIN_`k' NPI_`k'
	 gen upin=UPIN_`k'
	 gen npi=NPI_`k'
	  drop if upin=="" & npi==""
	  drop UPIN_`k' NPI_`k'
	save "$dataOut\uniqprov_`k'docs_`yr'.dta"
	
 restore
 
	}

*Stacking
	drop UPIN_* NPI_* PRF_PHYSN_UPIN PRF_PHYSN_NPI
		append using "$dataOut\uniqprov_OPdocs_`yr'.dta"
		append using "$dataOut\uniqprov_OTdocs_`yr'.dta"

	save "$dataOut\uniqprov_alldrLong_`yr'.dta", replace
}
 
***MERGE THE PHYSICIAN FIRM INFORMATION ONTO DATASET OF PROVIDER-MONTH DATA ***
*Dealing with the fact that in early years, doctors are identified with UPIN but in 
*later years the doctors are identified with NPI
	foreach yr of numlist 2005(1)2012 {
		foreach type in "NPI" "UPIN" {
			use PRF_PHYSN_`type' PRVDR* using "$origData\Work/`yr'\bcarrier_line_j.dta"
				 gen year=`yr'
			 duplicates drop
			 local lowtype=lower("`type'")
				 rename PRF_PHYSN_`type' `lowtype' 
				 replace `lowtype' = "" if length(`lowtype')<6 
				 replace `lowtype' = "" if `lowtype'== "0000000000"
				 replace `lowtype' = "" if `lowtype'== "000000"
				 replace `lowtype' = "" if `lowtype' == "OTH000"

			drop if `lowtype'==""
			 bysort `lowtype' year: gen id=_n
			 bysort `lowtype' year: gen maxid=_N
			drop if maxid>5
			drop maxid
			
			reshape wide PRVDR_STATE_CD PRVDR_ZIP PRVDR_SPCLTY, i(`lowtype' year) j(id)
			sort `lowtype'
			
		save "$dataOut\drDems`type'_`yr'.dta", replace
	 }

	use "$dataOut\uniqprov_alldrLong_`yr'.dta", replace
	capture drop PRVDR_STATE_CD PRVDR_ZIP PRVDR_SPCLTY
	duplicates drop

	foreach type in "npi" "upin" {
		sort `type'
		 merge m:1 `type' using "$dataIn\cleaned_ska_`type'.dta", keepusing(ID) 
		 keep if _m ~= 2
		 drop _m

	rename ID `type'_ID
		replace npi = "" if npi == "0"
		replace npi = "" if npi == "0000000000"
		replace upin = "" if upin == "OTH000"

	capture drop year
		gen year=year(date)
	sort `type'

	merge m:1 `type' year using "$dataOut\drDems`type'_`yr'.dta"
	keep if _m ~= 2
	drop _m

	foreach var of varlist PRVDR* {
		local var1=regexr("`var'","PRVDR_","")
		rename `var' `var1'_`type'
	  }
	 }
	save "$dataOut\alldrInfo_`yr'.dta", replace
	}

***LIMIT TO UNIQUE PROVIDER-BENE-MONTH DATA ***

	foreach yr of numlist 2005(1)2012 {
		use "$dataOut\alldrInfo_`yr'.dta", replace
			 egen id = group( BENE_ID npi upin date), missing
		 bysort id: gen idflag=_n==1
		 keep if idflag==1
		 drop idflag
			 rename npi_ID vmgprac_npi
			 rename upin_ID vmgprac_npi
		save "$dataOut\unique_alldr-bene__`yr'.dta", replace
	} 


***Add Taxid***
	foreach yr of numlist 2005(1)2012 {
		use *TAX* PRF_PHYSN_UPIN PRF_PHYSN_NPI ORG_NPI_NUM using "$origData\Work/`yr'\bcarrier_line_j.dta"
			gen npi = PRF_PHYSN_NPI
			gen upin = PRF_PHYSN_UPIN 
		drop PRF_PHYSN_NPI PRF_PHYSN_UPIN 
			duplicates drop
		save "$dataOut\taxid_`yr'.dta", replace
		clear
	}

*Append datasets
	use "$dataOut\taxid_2005.dta"
		append using  "$dataOut\taxid_2006.dta" 
		append using  "$dataOut\taxid_2007.dta" 
		append using  "$dataOut\taxid_2008.dta" 
		append using  "$dataOut\taxid_2009.dta" 
		append using  "$dataOut\taxid_2010.dta" 
		append using  "$dataOut\taxid_2011.dta" 
		append using  "$dataOut\taxid_2012.dta" 
	duplicates drop

*Account for doctors that move around or merge
	sort npi upin TAX_NUM
		egen id=group(npi upin), missing

		*Missing variables create a lot of TAX-number combinations
		 drop if npi=="" & upin==""
		 drop if npi=="" & regexm(upin,"^NEW")
		 drop if npi=="" & regexm(upin,"^000")
		 drop if npi=="" & regexm(upin,"^UNKNOW")
		 drop if npi=="" & regexm(upin,"^EXE000")
		 drop if npi=="" & length(upin)<5
		 drop if regexm(npi,"^999999999") & upin==""
	save "$dataOut\uniqtaxid05-12.dta", replace

		gen validnpi= length(npi)>=8 & regexm(npi,"^999999999")!=1
		 replace validnpi=0 if regexm(npi,"^999999999") 
		gen validupin= length(upin)>=5 & regexm(upin,"^000")!=1 
		 replace validupin=0 if regexm(upin,"^NEW")
		 replace validupin=0 if regexm(upin,"^UNKNOW")
		 replace validupin=0 if regexm(upin,"^EXE000")
		 replace validupin=0 if inlist(upin,"X63496","X41880","X93511","W96045","W78567","X93810","A06733","W47335","W90714")
		 replace validupin=0 if inlist(upin,"W07698","W02375","W09357","X69147","X34453","W78882","W98154","D51356","E73997") 
		 replace validupin=0 if inlist(upin,"W01639","W10526","X49363","W20778","W00364","W21736","W33767") 
	drop if validnpi==0 & validupin==0

capture drop id2 validnpi validupin
bysort id: gen id2=_n
bysort id: gen id3=_N
drop if id3>5
drop id3
reshape wide TAX_NUM, i(npi upin id) j(id2)
drop id 
save "$dataOut\uniqtaxid05-12_reshape.dta", replace

use "$dataOut\uniqtaxid05-12_reshape.dta"

*Merge taxids onto Dr-Bene dataset
*NPI
	tempfile taxnpi
	tempfile taxupin
	preserve
	keep if npi~=""
		gen validnpi= length(npi)>=8 & regexm(npi,"^999999999")!=1
	drop if validnpi==0 
	drop upin ORG_NPI_NUM validnpi
	duplicates drop
		bysort npi: gen ct=_n
	reshape long TAX_NUM, i(npi ct) j(id)
	drop id ct
	drop if TAX==""
	duplicates drop
		bysort npi: gen id1=_n
		bysort npi: gen id2=_N
	drop if id2>7
	drop id2
	reshape wide TAX_NUM, i(npi) j(id1)

	save `taxnpi', replace
	clear
	restore

*UPIN
	preserve
		keep if upin~=""
		drop npi ORG_NPI_NUM 
			gen validupin= length(upin)>=5 & regexm(upin,"^000")!=1 
			*Deal with invalid UPIN
			 replace validupin=0 if regexm(upin,"^NEW")
			 replace validupin=0 if regexm(upin,"^UNKNOW")
			 replace validupin=0 if regexm(upin,"^EXE000")
			 replace validupin=0 if inlist(upin,"X63496","X41880","X93511","W96045","W78567","X93810","A06733","W47335","W90714")
			 replace validupin=0 if inlist(upin,"W07698","W02375","W09357","X69147","X34453","W78882","W98154","D51356","E73997") 
			 replace validupin=0 if inlist(upin,"W01639","W10526","X49363","W20778","W00364","W21736","W33767") 
		drop if validupin==0

		duplicates drop
		bysort upin: gen ct=_n
		reshape long TAX_NUM, i(upin ct) j(id)
		drop id ct
		duplicates drop
			bysort upin: gen id1=_n
			bysort upin: gen id2=_N
		drop if id2>7
		drop id2
		reshape wide TAX_NUM, i(upin) j(id1)

	foreach var of varlist TAX* {
		rename `var' upin`var' 
	}

sort upin 
save `taxupin', replace
clear

*Merge on taxid information onto doctor datasets
	foreach yr of numlist 2005(1)2012 {
		use "$dataOut\unique_alldr-bene__`yr'.dta", replace
			sort npi
			merge m:1 npi using `taxnpi'
			drop if _merge==2
			drop _merge

		 merge m:1 upin using `taxupin'
			drop if _merge==2
			drop _merge
		save "$dataOut\\taxidall_alldr-bene_`yr'.dta", replace
	}


foreach yr of numlist 2005(1)2012 {
use "$dataOut\\taxidall_alldr-bene_`yr'.dta" , replace
capture drop id
capture drop validupin
di "`yr'"

foreach var of varlist TAX* {
replace `var' = upin`var' if `var' ==""
di "`var'"
 }

rename NUM_npi NUM_NPI
rename npi NPI
foreach var of varlist *npi {
local upinvar = subinstr("`var'","npi","upin",.)
di "`upinvar'"
replace `var' = `upinvar' if `var' ==""
 }
rename upin UPIN
drop *upin upinTAX* 
save "$dataOut\\taxidall_alldr-bene_clean_`yr'.dta", replace
} 

**MERGE ON PATIENT DEMOGRAPHICS AND HEALTH OUTCOMES
*MEMBER DEMOGS
	foreach yr of numlist 2005(1)2012 {
		use BENE_ID STATE_CODE BENE_COUNTY_CD BENE_ZIP_CD BENE_BIRTH_DT BENE_RACE_CD BENE_SEX_IDENT_CD using "$path_mbsf\`yr'\mbsf_ab_summary.dta"     
			sort BENE_ID
			duplicates tag BENE_ID, gen(tag)
			di "`yr'"
			tab tag
			drop if tag>0
			drop tag

		 merge 1:m BENE_ID using "$dataOut/bene_icdDeath_all_`yr'.dta"
		save "$dataOut/mbrdemog`yr'.dta", replace
		clear
	}


*DOCTORS  

	foreach yr of numlist 2005(1)2012 {
		use "$dataOut/mbrdemog`yr'.dta"  
			di "`yr'"
			drop _merge
			sort BENE_ID date
		 merge 1:m BENE_ID date using  "$dataOut/taxidall_alldr-bene_clean_`yr'.dta"
		save "$dataOut/ANALYSIS_V1_`yr'.dta", replace
		clear
	}

 

